/*******************************************************************************
																				
	DESCRIPTION: 	This do file executes all the Stata files sequentially.
																									
*******************************************************************************/
clear all
macro drop all
set more off 
set scheme s2color

* Main directory (this should be the folder in which you place the "Programs" folder):
global main "placeholder_main"
cd "${main}" // This sets the current directory so that any accidentally saved 
			* output is saved in a sensible place.

* Folders with raw data:
global raw_data "placeholder_raw_data" 

global path_LISA 	"${raw_data}/LISA"
global wealthData	"${raw_data}/WEALTHR"
global path_PES 	"${raw_data}/PES"
global wages		"${raw_data}/Wages"
global AKAS			"${raw_data}/AKAS"
global UI_fund		"${raw_data}/UI_fund"
global SILC			"${raw_data}/SILC"
global IQData		"${raw_data}/Enlistment_test"

* Folders to store new data
global data "${main}/Data"
cap mkdir "${data}"
global data_intermediate 	"${data}/Intermediate"
cap mkdir "${data_intermediate}"
global tempfile 			"${data}/TempFiles"
cap mkdir "${tempfile}"

* Output files
global output "${main}/Output"
cap mkdir "${output}"

* Code files 
global code "${main}/Programs"

* Common sets of variables
global demoEdu "Gender L_civilStatus* L_N_Kids* L_N_Kids_U18* L_Age_Youngest* age EducLevel* foreign*"
global migHist "citizenship* ySinceMigrat* migrationCohort*"
global mun "L_Municipality*"
global incHist "OtherInc_adj_L2_L5* OtherInc_L2_L5_adj0 WageInc_adj_L2_L5* WageInc_L2_L5_adj0 FamInc_adj_L2_L5* FamInc_L2_L5_adj0"
global incIndiv "L_WageInc_adj*"
global incOther "L_FamInc_adj* L_OtherInc_adj*"
global indu "L_Industry_3digit*"	
global emplHist "DaysOnDI_2Years* DaysOnDI_5Years* DaysUnemp_2Years* DaysUnemp_5Years* unemplSpells5Ybefore unemplSpells2Ybefore L_emplStatu* nEmployers5Y nEmployers5Y_Missing tenure* L_nEmployees_L1L2* L_firmSizeChange_L1L2* L_layoffRate_L1L2*" 
global wealth "L_NetWealth L_NetWealth0 L_Liabilities L_Liabilities0 L_BankAccount L_BankAccount0 L_RealEstate0 L_RealEstate"
global UI "L_additionalUI"
global wage "L1_monthlyWage L1_percenFullTimeNew L1_percenFullTimeMiss"
global occup "L_Occupation_3D_L1L2*"
global levelUI "replacRatio_new"
global IQ "cognit_dummy1 cognit_dummy2 cognit_dummy3 non_cognit_dummy1 non_cognit_dummy2 non_cognit_dummy3"	
global union "L_unionMember"

************************* 1. Data cleaning *************************************
* Clean raw data:
do "${code}/Data Management/001_1_CleaningPES.do"
do "${code}/Data Management/001_2_CleaningLISA.do"
do "${code}/Data Management/001_3_CleaningULF.do"
do "${code}/Data Management/001_4_CleaningWealth.do"
do "${code}/Data Management/001_5_CleaningWages.do"
do "${code}/Data Management/001_7_CleaningUnemplBenefits.do"
do "${code}/Data Management/001_8_CleaningUnemplInsurance.do"
do "${code}/Data Management/001_10_CleaningIQ.do"
do "${code}/Data Management/001_11_CleaningSILC.do"
do "${code}/Data Management/001_12_CleaningMONICA.do"

* Merge everything:
do "${code}/Data Management/001_9_MergingData.do"

* Export data for R:
do "${code}/Data Management/002_1_DataForR_byYear.do"
do "${code}/Data Management/002_2_DataForR_byYear_Expanded.do"
do "${code}/Data Management/002_3_DataForR_byYear_EmploymentHistory.do"
do "${code}/Data Management/002_4_DataForR_Pooled.do"

* Create an ALMP registry and use it to generate data for the model without ALMPs:
do "${code}/Data Management/005_UnemploymentCategoryStats.do"
do "${code}/Data Management/002_5_DataForR_NoTraining.do"


************************ 2. Model estimation ***********************************
* STOP AND RUN THE FOLLOWING R FILES MANUALLY:
exit, clear
	* ${code}/Output Generation/104_1_caret_execution_byYear.R
	* ${code}/Output Generation/104_2_caret_execution_byYear_xMonthPred_yMonthModel.R
	* ${code}/Output Generation/104_3_caret_execution_byYear_YearIndividuals_TrainedOnOtherYears.R
	* ${code}/Output Generation/104_4_caret_execution_Pooled.R
	* ${code}/Output Generation/104_5_caret_execution_2006_OtherModels.R
	* ${code}/Output Generation/104_6_caret_execution_EmploymentHistory.R
	* ${code}/Output Generation/104_7_caret_execution_NoTraining.R
	* ${code}/Output Generation/104_8_caret_execution_ML_Robustness.R
	* ${code}/Output Generation/104_9_caret_execution_NoRecalls.R
	
* Clean predictions:
do "${code}/Data Management/003_1_CleaningPredictions_byYear.do"
do "${code}/Data Management/003_2_CleaningPredictions_byYear_xMonthPred_yMonthModel.do"
do "${code}/Data Management/003_3_CleaningPredictions_YearIndividuals_TrainedOnOtherYears.do"

* Other assorted cleaning:
do "${code}/Data Management/004_Combining_weights.do"
do "${code}/Output Generation/101_Unemployment_rate.do"

* Estimate linear models:
do "${code}/Output Generation/116_1_Linear_reference_model.do"
do "${code}/Output Generation/116_2_Linear_expanded_models.do"

* Run data preparation for cyclicality and duration dependence analysis:
do "${code}/Output Generation/114_1_Cyclicality_DataPreparation_beforeR.do"
do "${code}/Output Generation/119_1_DurationDependence_DataPreparation_beforeR.do"

* AGAIN, STOP AND RUN THE FOLLOWING R FILES MANUALLY:
exit, clear
	* ${code}/Output Generation/114_2_Cyclicality_IndividualRegression.R
	* ${code}/Output Generation/119_2_DurationDependence_IndividualRegressions.R
	
* Clean the R output:
do "${code}/Output Generation/114_3_Cyclicality_DataPreparation_afterR.do"
do "${code}/Output Generation/119_3_DurationDependence_DataPreparation_afterR.do"
	
	
*************************** 3. Output generation *******************************

* Descriptives table:
do "${code}/Output Generation/106_BasicStatistics.do"

* ROC curves:
do "${code}/Output Generation/107_ROC_Curves_2006.do"

* Main statistics tables (covariance, R2, etc.):
do "${code}/Output Generation/108_1_MainStatistics_xMonthPred_yMonthModel.do"
do "${code}/Output Generation/108_2_MainStatistics_2006_DifferentJobFindingHorizons.do"
do "${code}/Output Generation/108_3_MainStatistics_2006_Robustness.do"

* Dynamic selection graph:
do "${code}/Output Generation/109_DynamicSelection.do"

* Explanatory power of variables tables:
do "${code}/Output Generation/111_1_Explanatory_power_of_variables.do"
do "${code}/Output Generation/111_2_Explanatory_power_of_variables_EmploymentHistory.do"
do "${code}/Output Generation/111_3_Explanatory_power_of_variables_Expanded.do"

* Histograms of predictions:
do "${code}/Output Generation/112_1_DistributionsGraphs_byYear_Full_vs_Basic.do"
do "${code}/Output Generation/112_2_DistributionsGraphs_byYear_2006_vs_2009.do"
do "${code}/Output Generation/112_3_DistributionsGraphs_byYear_DifferentJobFindingHorizons.do"
do "${code}/Output Generation/112_4_DistributionsGraphs_ALMPs.do"

* Scatter plots of predictions vs outcomes:
do "${code}/Output Generation/113_1_Scatterplot_empiricalJFR_on_predictedJFR.do"
do "${code}/Output Generation/113_2_Scatterplot_empiricalJFR_on_predictedJFR_LinearModel.do"
do "${code}/Output Generation/113_3_Scatterplot_empiricalJFR_on_predictedJFR_ML_Robustness.do"

* Cyclicality plots:
do "${code}/Output Generation/114_4_Cyclicality_Distributions.do"
do "${code}/Output Generation/114_5_Cyclicality_Heterogeneity_Visualization.do"
do "${code}/Output Generation/117_Cyclicality_R_squared.do"
do "${code}/Output Generation/128_DataPreparation_Compositional_Cyclicality.do"
do "${code}/Output Generation/128_Timelines_Compositional_Cyclicality.do"

* Regressions with survey data:
do "${code}/Output Generation/120_Regression_SILC_variables.do"

* Duration dependence plots:
do "${code}/Output Generation/119_4_DurationDependence_Distributions.do"
do "${code}/Output Generation/119_5_DurationDependence_Heterogeneity_Visualization.do"

* Bootstrapping the main statistics:
do "${code}/Output Generation/126_Bootstrapping.do"

* Other results:
do "${code}/Output Generation/133_Heterogeneity_Regressions.do"
do "${code}/Output Generation/134_1_TwoSpell_DataPreparation.do"
do "${code}/Output Generation/134_2_TwoSpell_Analysis.do"
do "${code}/Output Generation/134_3_TwoSpell_Table_Main.do"
do "${code}/Output Generation/134_4_TwoSpell_Table_LTU.do"
do "${code}/Output Generation/134_5_TwoSpell_Table_SplitByBetaD.do"
do "${code}/Output Generation/136_Prop_Hazard_Test.do"

* ML Robustness:
do "${code}/Output Generation/140_ML_Robustness_Weights.do"
do "${code}/Output Generation/141_ML_Robustness_Correlation.do"
do "${code}/Output Generation/142_ML_Robustness_Tuning.do"
